Remove Duplicates (same values) from Dictionary¶
iterate by items; check value
Remove Duplicates (same values) from Dictionary.
def remove_duplicates(D):
result = {}
for k, v in D.items():
if v not in result.values():
result[k] = v
return result
Test:
student_data = {
'id1':
{'name': ['Sara'],
'class': ['V'],
'subject_integration': ['english, math, science']
},
'id2':
{'name': ['David'],
'class': ['V'],
'subject_integration': ['english, math, science']
},
'id3':
{'name': ['Sara'],
'class': ['V'],
'subject_integration': ['english, math, science']
},
'id4':
{'name': ['Surya'],
'class': ['V'],
'subject_integration': ['english, math, science']
},
}
print(remove_duplicates(student_data))
Output:
{'id2':
{'subject_integration': ['english, math, science'],
'class': ['V'],
'name': ['David']},
'id4':
{'subject_integration': ['english, math, science'],
'class': ['V'],
'name': ['Surya']
},
'id1':
{'subject_integration': ['english, math, science'],
'class': ['V'],
'name': ['Sara']
}
}
See also
https://www.w3resource.com/python-exercises/dictionary/python-data-type-dictionary-exercise-17.php
Another example¶
One simple approach would be to create a Reverse Dictionary using
the concatenation of the string data in each inner dictionary as a key.
So, say you have the data in a dictionary D:
D = {
112762853378:
{'dst': ['10.121.4.136'],
'src': ['1.2.3.4'],
'alias': ['www.example.com'],
},
112762853385:
{'dst': ['10.121.4.136'],
'src': ['1.2.3.4'],
'alias': ['www.example.com'],
},
112760496444:
{'dst': ['10.121.4.136'],
'src': ['1.2.3.4'],
},
112760496502:
{'dst': ['10.122.195.34'],
'src': ['4.3.2.1'],
},
}
import collections
reverse_d = collections.defaultdict(list)
for key, inner_d in D.items():
key_str = ''.join(inner_d[k][0] for k in ['dst', 'src', 'alias'] if k in inner_d)
reverse_d[key_str].append(key)
duplicates = [keys for key_str, keys in reverse_d.items() if len(keys) > 1]
print(duplicates) # [[112762853378, 112762853385]]
If you don’t want a list of duplicates or anything like that,
but just want to create a duplicate-less dict, you could just use
a regular dictionary instead of a defaultdict and re-reverse it like so:
def remove_duplicates(D):
import collections
reverse_d = collections.defaultdict(list)
for key, inner_d in D.items():
key_str = ''.join(inner_d[k][0] for k in ['dst', 'src', 'alias'] if k in inner_d)
# print(key_str, "<=", key)
reverse_d[key_str] = key
new_dict = dict((val, D[val]) for val in reverse_d.values())
return new_dict
print(remove_duplicates(D))
Output:
{112762853385: {'dst': ['10.121.4.136'], 'src': ['1.2.3.4'], 'alias': ['www.example.com']},
112760496444: {'dst': ['10.121.4.136'], 'src': ['1.2.3.4']},
112760496502: {'dst': ['10.122.195.34'], 'src': ['4.3.2.1']}
}